library(tidyverse)
## ── Attaching packages ──────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
surveys_complete <- read_csv("data/surveys_complete.csv")
## Parsed with column specification:
## cols(
##   record_id = col_double(),
##   month = col_double(),
##   day = col_double(),
##   year = col_double(),
##   plot_id = col_double(),
##   species_id = col_character(),
##   sex = col_character(),
##   hindfoot_length = col_double(),
##   weight = col_double(),
##   genus = col_character(),
##   species = col_character(),
##   taxa = col_character(),
##   plot_type = col_character()
## )
surveys_complete
## # A tibble: 30,463 x 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1       845     5     6  1978       2 NL         M                  32    204
##  2      1164     8     5  1978       2 NL         M                  34    199
##  3      1261     9     4  1978       2 NL         M                  32    197
##  4      1756     4    29  1979       2 NL         M                  33    166
##  5      1818     5    30  1979       2 NL         M                  32    184
##  6      1882     7     4  1979       2 NL         M                  32    206
##  7      2133    10    25  1979       2 NL         F                  33    274
##  8      2184    11    17  1979       2 NL         F                  30    186
##  9      2406     1    16  1980       2 NL         F                  33    184
## 10      3000     5    18  1980       2 NL         F                  31     87
## # … with 30,453 more rows, and 4 more variables: genus <chr>, species <chr>,
## #   taxa <chr>, plot_type <chr>
ggplot(data = surveys_complete)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point()

# Assign plot to a variable

surveys_plot <- ggplot(data= surveys_complete,
                       mapping = aes(x = weight, y = hindfoot_length))

# Draw the plot

surveys_plot +
  geom_point()

Challenge 1 (optional)

install.packages("hexbin")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("hexbin")
surveys_plot +
  geom_hex()

# hexagonal strengths: ability to see when there is a higher concentration of coutnts more clearly

# hexagonal weaknesses: points may not be as accurate since they are larger due to their hexagonal shape as opposed to circles, which are smaller, and are able to give more accurate information

Building your plots iteratively

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point()

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point(alpha = 0.1)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
  geom_point(alpha = 0.1, color = "blue")

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
  geom_point(alpha = 0.1, aes(color = species_id))

Challenge 2

ggplot(data = surveys_complete, mapping = aes (x = species_id, y = weight)) +
  geom_point(aes(color = plot_type))

# this is a good way to this type of data since you are able to clearly see the plot type for each species

Boxplot

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_boxplot()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_boxplot(alpha = 0) +
  geom_jitter(alpha = 0.3, color = "tomato")

Challenge 3

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_violin()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) + 
  scale_y_log10() +
  geom_violin()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
  geom_boxplot() +
  geom_jitter()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
  geom_boxplot() +
  geom_jitter(color = "blue")

Plotting Time Series Data

yearly_counts <- surveys_complete %>% 
  count(year, genus)

ggplot(data = yearly_counts, aes(x = year, y = n)) +
  geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
  geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
  geom_line()

Integrating the pipe operator with ggplot2

yearly_counts %>% 
  ggplot(mapping = aes(x = year, y = n, color = genus)) +
  geom_line()

yearly_counts_graph <- surveys_complete %>%
  count(year, genus) %>% 
  ggplot(mapping = aes(x = year, y = n, color = genus)) +
  geom_line()

yearly_counts_graph

Faceting

ggplot(data = yearly_counts, aes(x = year, y = n)) +
  geom_line() +
  facet_wrap(facets = vars(genus))

yearly_sex_counts <- surveys_complete %>% 
  count(year, genus, sex)

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets = vars(genus))

ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols = vars(genus))

# One column, facet by rows

ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))

# One row, facet by column

ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))

ggplot2 Themes

ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) + 
  geom_line() +
  facet_wrap(vars(genus)) +
  theme_bw()

yearly_weight <- surveys_complete %>%
  group_by(year, species_id) %>% 
  summarize(avg_weight = mean(weight))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x = year, y = avg_weight)) +
  geom_line() +
  facet_wrap(vars(species_id)) +
  theme_bw()

Customization

ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(vars(genus)) +
  labs(title = "Observed genera through time",
       x = "Year of observation",
       y = "Number of individuals") +
  theme_bw()

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(vars(genus)) +
  labs(title = "Observations through time",
       x = "Years of observation",
       y = "Number of individuals") +
  theme_bw() +
  theme(text=element_text(size = 16))

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(vars(genus)) +
  labs(title = "Observed genera through time",
       x = "Years of observation",
       y = "Number of individuals") +
  theme_bw() +
  theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                      axis.text.y = element_text(colour = "grey20", size = 12),
                      strip.text = element_text(face = "italic"),
                      text = element_text(size = 16))

grey_theme <- theme(axis.text.x = element_text(colour = "grey20", size = 12,
              angle = 90, hjust = 0.5,
              vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
              text = element_text(size = 16))

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
  geom_boxplot() +
  grey_theme

Challenge 4